package ssm.blog.util;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @program: lyon
 * @description: 从HTML中提取出IMG的src
 * @author: Lyon
 * @create: 2018-05-30 22:12
 **/
public class getImgSrc {
    static public List<String> getImgStr(String htmlStr) {
        List<String> pics = new ArrayList<>();
        String img = "";
        Pattern p_image;
        Matcher m_image;
        Pattern p_src;
        String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
        String regEx_src="src\\s*=\\s*\"?(.*?)(\"|>|\\s+)";
        p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
        p_src=Pattern.compile(regEx_src,Pattern.CASE_INSENSITIVE);
        m_image = p_image.matcher(htmlStr);
        while (m_image.find()) {
            // 得到<img />数据
            img = m_image.group();
            // 匹配<img>中的src数据
            Matcher m = p_src.matcher(img);
            while (m.find()) {
                pics.add(m.group(1));
            }
        }
        return pics;
    }
}
